library(tidyverse)
## Warning: package 'dplyr' was built under R version 4.0.5
#read in all files
setwd("~/Desktop/Cluster/OOSEst/26Oct2023")
files <- list.files(pattern=".Rdata")
all_res <- target_res <- c()
for (i in 1:length(files)) {
load(file = files[i])
#all metrics
all_res_ind <- results$all_res
all_res <- bind_rows(all_res, all_res_ind)
#target metrics
target_res_ind <- results$target_res %>%
mutate(K = unique(all_res_ind$K),
n_mean = unique(all_res_ind$n_mean),
n_sd = unique(all_res_ind$n_sd),
covars_fix = unique(all_res_ind$covars_fix),
covars_rand = unique(all_res_ind$covars_rand),
lin = unique(all_res_ind$lin),
eps_study_m = unique(all_res_ind$eps_study_m),
eps_study_tau = unique(all_res_ind$eps_study_tau),
eps_study_inter = unique(all_res_ind$eps_study_inter),
distribution = unique(all_res_ind$distribution))
target_res <- bind_rows(target_res, target_res_ind)
print(i)
}
#checking output
seeds <- strsplit(files,"_") %>% map_chr(.f=function(x) x[2])
length(unique(seeds)) == length(files) #making sure every iteration has a unique seed
nrow(all_res) #to make sure every iteration ran
nrow(target_res)/800
setwd("~/Dropbox/Moderation/Out of Sample Prediction/Data")
saveRDS(all_res, "all_res_26Oct2023.RDS")
saveRDS(target_res, "target_res_26Oct2023.RDS")
all_res <- readRDS("Data/all_res_26Oct2023.RDS")
target_res <- readRDS("Data/target_res_26Oct2023.RDS")
#summarise across iterations
target_ind <- target_res %>%
group_by(id, W, sex, smstat, age, age2, weight, madrs, Y, tau, method,
K, n_mean, n_sd, covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, distribution) %>%
summarise(n_iter = n(),
coverage = mean(coverage),
bias = mean(bias),
abs_bias = mean(abs_bias),
length = mean(length),
significant = mean(significant)) %>%
mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, sep = "_"),
sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
Linear = ifelse(lin == T, "Linear", "Non-Linear"),
sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
sds == "1_0.5_0.05" ~ "Heterogeneous Main",
sds == "1_1_0.5" ~ "High Heterogeneity"),
levels = c("Low Heterogeneity", "Heterogeneous Intercept",
"Heterogeneous Main", "High Heterogeneity")))
#average across target sample
target_sum <- target_ind %>%
group_by(method,
K, n_mean, n_sd, covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, distribution) %>%
summarise(across(coverage:significant, list(mean = mean, sd = sd))) %>%
mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, sep = "_"),
sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
Linear = ifelse(lin == T, "Linear", "Non-Linear"),
sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
sds == "1_0.5_0.05" ~ "Heterogeneous Main",
sds == "1_1_0.5" ~ "High Heterogeneity"),
levels = c("Low Heterogeneity", "Heterogeneous Intercept",
"Heterogeneous Main", "High Heterogeneity")))
#long form for boxplot
all_long <- all_res %>%
rename(MA = manual_res, MA_Inc = manual_res_wrong,
HCF = cf_res, ACF = cf_a_res,
HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
SBART = sb_res, SBART_Rand = sb_res_rand) %>%
pivot_longer(cols = MA:SBART_Rand,
names_to = "Method",
values_to = "Value") %>%
mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, sep = "_"),
sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"),
Dataset = factor(ifelse(grepl("target", Metric) == T,
"Target", "Training"),
levels = c("Training", "Target")),
Linear = ifelse(lin == T, "Linear", "Non-Linear"))
#average across iterations
all_sum <- all_res %>%
rename(MA = manual_res, MA_Inc = manual_res_wrong,
HCF = cf_res, ACF = cf_a_res,
HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
SBART = sb_res, SBART_Rand = sb_res_rand) %>%
group_by(Metric, K, n_mean, n_sd, covars_fix, covars_rand,
lin, eps_study_m, eps_study_tau, eps_study_inter,
distribution) %>%
summarise(across(MA:SBART_Rand, list(mean = mean, sd = sd)),
n_iter = n())
summary(all_sum$n_iter) #check iterations
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 500 500 500 500 500 500
#average in long form
all_avg_long <- all_res %>%
rename(MA = manual_res, MA_Inc = manual_res_wrong,
HCF = cf_res, ACF = cf_a_res,
HCF_Rand = cf_res_rand, ACF_Rand = cf_a_res_rand,
SBART = sb_res, SBART_Rand = sb_res_rand) %>%
group_by(Metric, K, n_mean, n_sd, covars_fix, covars_rand,
lin, eps_study_m, eps_study_tau, eps_study_inter,
distribution) %>%
summarise(across(MA:SBART_Rand, mean)) %>%
pivot_longer(cols = MA:SBART_Rand,
names_to = "Method",
values_to = "Value") %>%
mutate(scenario = paste(covars_fix, covars_rand, lin, eps_study_m,
eps_study_tau, eps_study_inter, sep = "_"),
sds = paste(eps_study_m, eps_study_tau, eps_study_inter, sep = "_"))
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_boxplot(aes(y=coverage)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds_lab, scales = "free") +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=15)) +
ylab("Coverage in Target Sample") +
xlab("")
#ggsave("Results/26Oct2023/coverage.jpeg")
#for job talk
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
mutate(method = factor(method, levels = c("MA", "HCF", "SBART"))) %>%
ggplot(aes(x=method)) +
geom_boxplot(aes(y=coverage)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds_lab, scales = "free") +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample") +
xlab("")
#average across all iterations
target_sum %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_point(aes(y=coverage_mean)) +
# geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
# ymax = min(coverage_mean + 1.96*coverage_sd, 1),
# color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds) +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#coverage by age - all
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=coverage, color=method)) +
geom_smooth(se=F) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#points
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=coverage, color=method)) +
geom_point() +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free")
#coverage by madrs - all
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=madrs, y=coverage, color=method)) +
geom_smooth(se=F) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#points
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=madrs, y=coverage, color=method)) +
geom_point() +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free")
#coverage by age - HCF
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method == "HCF") %>%
ggplot(aes(x=age, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free") +
ggtitle("Results Using HCF")
#coverage by madrs - HCF
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method == "HCF") %>%
ggplot(aes(x=madrs, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free") +
ggtitle("Results Using HCF")
#coverage by age - SBART
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method == "SBART") %>%
ggplot(aes(x=age, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free") +
ggtitle("Results Using SBART")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_boxplot(aes(y=length)) +
facet_grid(Linear ~ sds_lab, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Interval Length in Target Sample") +
xlab("")
#ggsave("Results/26Oct2023/length.jpeg")
#average across all iterations
target_sum %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_point(aes(y=length_mean)) +
# geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
# ymax = min(coverage_mean + 1.96*coverage_sd, 1),
# color=method)) +
facet_grid(Linear ~ sds) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("CI Length in Target Sample")
#length by age - all
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=length, color=method)) +
geom_smooth(se=F) +
facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#points
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=length, color=method)) +
geom_point() +
facet_grid(Linear ~ sds_lab, scales = "free")
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
mutate(sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
sds == "1_0.5_0.05" ~ "Heterogeneous Main",
sds == "1_1_0.5" ~ "High Heterogeneity"),
levels = c("Low Heterogeneity", "Heterogeneous Intercept",
"Heterogeneous Main", "High Heterogeneity"))) %>%
ggplot(aes(x=method)) +
geom_boxplot(aes(y=abs_bias)) +
facet_grid(Linear ~ sds_lab, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Absolute Bias in Target Sample") +
xlab("")
ggsave("Results/26Oct2023/abs_bias.jpeg")
## Saving 10 x 7 in image
#for job talk
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
mutate(method = factor(method, levels = c("MA", "HCF", "SBART")),
sds_lab = factor(case_when(sds == "0.05_0.05_0.05" ~ "Low Heterogeneity",
sds == "1_0.05_0.05" ~ "Heterogeneous Intercept",
sds == "1_0.5_0.05" ~ "Heterogeneous Main",
sds == "1_1_0.5" ~ "High Heterogeneity"), levels = c("Low Heterogeneity", "Heterogeneous Intercept", "Heterogeneous Main", "High Heterogeneity"))) %>%
ggplot(aes(x=method)) +
geom_boxplot(aes(y=abs_bias)) +
facet_grid(Linear ~ sds_lab, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=15)) +
ylab("Absolute Bias in Target Sample") +
xlab("")
#bias by age - all
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=bias, color=method)) +
geom_smooth(se=F) +
facet_grid(Linear ~ sds_lab, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#points
target_ind %>%
filter(covars_fix == "age",
distribution == "same",
method %in% c("HCF", "MA", "SBART")) %>%
ggplot(aes(x=age, y=bias, color=method)) +
geom_point() +
facet_grid(Linear ~ sds_lab, scales = "free")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age",
distribution == "varying_madrs",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method, group=method)) +
geom_boxplot(aes(y=coverage, color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#average across all iterations
target_sum %>%
filter(covars_fix == "age",
distribution == "varying_madrs",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_point(aes(y=coverage_mean)) +
# geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
# ymax = min(coverage_mean + 1.96*coverage_sd, 1),
# color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds) +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#coverage by age - HCF
target_ind %>%
filter(covars_fix == "age",
distribution == "varying_madrs",
method == "HCF") %>%
ggplot(aes(x=age, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free") +
ggtitle("Results Using HCF")
#covarage by madrs - HCF
target_ind %>%
filter(covars_fix == "age",
distribution == "varying_madrs",
method == "HCF") %>%
ggplot(aes(x=madrs, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds_lab, scales = "free") +
ggtitle("Results Using HCF")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age",
distribution == "separate_age",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method, group=method)) +
geom_boxplot(aes(y=coverage, color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age, madrs",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method, group=method)) +
geom_boxplot(aes(y=coverage, color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#average across all iterations
target_sum %>%
filter(covars_fix == "age, madrs",
distribution == "same",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method)) +
geom_point(aes(y=coverage_mean)) +
# geom_errorbar(aes(ymin = coverage_mean - 1.96*coverage_sd,
# ymax = min(coverage_mean + 1.96*coverage_sd, 1),
# color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds) +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#coverage by age - HCF
target_ind %>%
filter(covars_fix == "age, madrs",
distribution == "same",
method == "HCF") %>%
ggplot(aes(x=age, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds, scales = "free") +
ggtitle("Results Using HCF")
#coverage by madrs - HCF
target_ind %>%
filter(covars_fix == "age, madrs",
distribution == "same",
method == "HCF") %>%
ggplot(aes(x=madrs, y=coverage)) +
geom_point() +
scale_y_continuous(labels = scales::percent) +
facet_grid(Linear ~ sds, scales = "free") +
ggtitle("Results Using HCF")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age, madrs",
distribution == "varying_madrs",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method, group=method)) +
geom_boxplot(aes(y=coverage, color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
#boxplot across 100 people in target sample
target_ind %>%
filter(covars_fix == "age, madrs",
distribution == "separate_age",
method %in% c("ACF", "HCF", "MA", "MA_Inc", "SBART")) %>%
ggplot(aes(x=method, group=method)) +
geom_boxplot(aes(y=coverage, color=method)) +
geom_hline(aes(yintercept = 0.95), linetype = "dashed") +
facet_grid(Linear ~ sds, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
ylab("Coverage in Target Sample")
For now, just using the setup where age is the fixed and random moderator, the training distributions are the same across trials, and the target distribution is the same as the training.
#age, same target distribution
#mse
all_long %>%
filter(Method %in% c("MA","MA_Inc"), covars_fix == "age",
distribution == "same",
grepl("mse", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_grid(Dataset~Linear, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("MSE")
#ggsave("Results/19Oct2023/MA_MSE_19Oct2023.jpeg")
#coverage
all_long %>%
filter(Method %in% c("MA","MA_Inc"), covars_fix == "age",
distribution == "same",
grepl("coverage", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(Dataset~Linear, scales = "free") +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent")
#ggsave("Results/19Oct2023/MA_Cov_19Oct2023.jpeg")
#age, same target distribution
#mse
all_long %>%
filter(Method %in% c("HCF","ACF","HCF_Rand","ACF_Rand"), covars_fix == "age",
distribution == "same",
grepl("mse", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_grid(Dataset~Linear, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("MSE")
#ggsave("Results/19Oct2023/CF_MSE_19Oct2023.jpeg")
#coverage
all_long %>%
filter(Method %in% c("HCF","ACF","HCF_Rand","ACF_Rand"), covars_fix == "age",
distribution == "same",
grepl("coverage", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(Dataset~Linear, scales = "free") +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent")
#ggsave("Results/19Oct2023/CF_Cov_19Oct2023.jpeg")
#age, same target distribution
#mse
all_long %>%
filter(Method %in% c("SBART","SBART_Rand"), covars_fix == "age",
distribution == "same",
grepl("mse", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_grid(Dataset~Linear, scales = "free") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("MSE")
#ggsave("Results/19Oct2023/BART_MSE_19Oct2023.jpeg")
#coverage
all_long %>%
filter(Method %in% c("SBART","SBART_Rand"), covars_fix == "age",
distribution == "same",
grepl("coverage", Metric) == T,
sds != "1_1_0.5") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(Dataset~Linear, scales = "free") +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent")
#ggsave("Results/19Oct2023/BART_Cov_19Oct2023.jpeg")
#age, same target distribution
all_long %>%
filter(Metric == "target_coverage", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_wrap(~lin) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Target Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 819 rows containing non-finite values (stat_boxplot).
# points version
all_avg_long %>%
filter(Metric == "target_coverage", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_point(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_wrap(~lin) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Target Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 2 rows containing missing values (geom_point).
#age, same target distribution
all_long %>%
filter(Metric == "train_coverage", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_wrap(~lin) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Training Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 11450 rows containing non-finite values (stat_boxplot).
#age, same target distribution
all_long %>%
filter(Metric == "target_bias", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_wrap(~lin) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("Mean Absolute Bias in Target Sample") +
ggtitle("Same Target Distribution")
#age, same target distribution
all_long %>%
filter(Metric == "train_bias", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_wrap(~lin) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("Mean Absolute Bias in Training Sample") +
ggtitle("Same Target Distribution")
#age, same target distribution
all_long %>%
filter(Metric == "target_length", covars_fix == "age",
distribution == "same") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
facet_wrap(~lin) + ylim(0,10) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Length in Target Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 1067 rows containing non-finite values (stat_boxplot).
#age, same target distribution
all_long %>%
filter(Metric == "target_coverage", covars_fix == "age") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(lin~distribution) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Target Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 1818 rows containing non-finite values (stat_boxplot).
all_long %>%
filter(Metric == "target_coverage", covars_fix == "age, madrs") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(lin~distribution) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Target Sample") +
ggtitle("Same Target Distribution, Age and MADRS")
## Warning: Removed 258 rows containing non-finite values (stat_boxplot).
#age, same target distribution
all_long %>%
filter(Metric == "train_coverage", covars_fix == "age") %>%
ggplot(aes(x=sds, y=Value)) +
geom_boxplot(aes(color = Method)) +
geom_hline(aes(yintercept=.95), linetype = "dashed") +
facet_grid(lin~distribution) +
scale_y_continuous(labels = scales::percent, limits = c(.75, 1)) +
labs(color="Method") +
theme(axis.text.x = element_text(angle=90),
text = element_text(size=12)) +
xlab("Variability of Main, Trt, and Trt*Age Coefficients") +
ylab("CI Coverage Percent in Training Sample") +
ggtitle("Same Target Distribution")
## Warning: Removed 33496 rows containing non-finite values (stat_boxplot).